curl -LO https://hf-mirror.com/second-state/DeepSeek-R1-Distill-Llama-8B-GGUF/resolve/main/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf
curl -LO https://hf-mirror.com/second-state/Nomic-embed-text-v1.5-Embedding-GGUF/resolve/main/nomic-embed-text-v1.5.f16.gguf

### 服务端程序
wasm_server="llama-api-server.wasm"

### 模型文件
default_model_file="/data/llm/DeepSeek-R1-Distill-Llama-8B-Q5_K_M.gguf"
embedding_model_file="/data/llm/nomic-embed-text-v1.5.f16.gguf"
prompt_template="llama-3-chat,embedding"
model_name="DeepSeek-R1-Distill-Llama-8B,nomic-embed-text-v1.5.f16"

### 打印信息
printf "模型文件: $model_file, prompt: $prompt_template, wasm服务端文件: $wasm_server \n\n"

printf "启动服务 ...\n\n"

wasmedge --dir .:. \
  --nn-preload default:GGML:AUTO:${default_model_file} \
  --nn-preload embedding:GGML:AUTO:${embedding_model_file} \
  $wasm_server -p $prompt_template \
  --model-name $model_name \
  --ctx-size 8192,8192 \
  --batch-size 128,8192 \
  --log-prompts --log-stat
